import numpy as np
import pandas as pd
import geopandas as gpd
import fiona
import glob
import os
import contextily as ctx
from scipy.spatial import cKDTree
from shapely.geometry import Point, shape, LineString, MultiLineString, GeometryCollection, MultiPoint, Polygon, MultiPolygon # creating points
import json
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map, thread_map
pd.set_option('min_rows', 30)
import sys
from importlib import reload
from util import *
import matplotlib.pyplot as plt
import multiprocessing
# from pandarallel import pandarallel # parallel operations for speed
# pandarallel.initialize(nb_workers=8, progress_bar=True)
# import swifter
tqdm.pandas()
plt.rcParams['figure.figsize'] = (16, 16)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
gpd.options.use_pygeos
True
ls('restricted')
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | 2016_Unitary_Plan_operational_15Nov | 0.00 | 2021-09-02 04:46:20.562659 |
| 1 | BCs_issued_by_AUP_TLADCs_2021FEB.csv | 64.34 | 2021-09-23 04:36:31.125500 |
Total: 64.0MB
%%time
parcels = gpd.read_file('input/lds-nz-primary-parcels-CLIPPED-4326.gpkg').to_crs(4326)
parcels['geometry_polygon_4326'] = parcels.geometry.to_crs(4326)
parcels['representative_point_4326'] = parcels.representative_point()
/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/geopandas/geodataframe.py:577: RuntimeWarning: Sequential read of iterator was interrupted. Resetting iterator. This can negatively impact the performance. for feature in features_lst:
CPU times: user 1min 25s, sys: 3.54 s, total: 1min 28s Wall time: 1min 28s
parcels_sample = parcels.sample(10000)
# keep a centroid and polygon version - change between them according to need
parcels_sample['geometry_polygon_2193'] = parcels_sample.geometry.to_crs(2193)
parcels_sample['geometry_centroid_2193'] = parcels_sample.geometry_polygon_2193.centroid
parcels_sample['geometry_centroid_4326'] = parcels_sample.geometry_centroid_2193.to_crs(4326)
parcels_sample['geometry_polygon_4326'] = parcels_sample.geometry_polygon_2193.to_crs(4326)
# bounds of the parcel dataset
bounds2193 = {'x1': 1.5e6, 'x2':2e6, 'y1':5.8e6, 'y2':6.1e6}
# get a sample of non-road polygons
parcels_sample2 = parcels_sample[parcels_sample.parcel_intent != 'Road'].sample(5)
parcels.sindex
parcels_sample.sindex
parcels_sample2.sindex
<geopandas.sindex.PyGEOSSTRTreeIndex at 0x7f8c6f864f10>
parcels_sample['LINZ_parcel_ID'] = parcels_sample.id
parcels_sample['geometry'] = parcels_sample.geometry_centroid_4326
parcels_sample['LINZ_parcel_centroid_lon'] = parcels_sample.geometry.x
parcels_sample['LINZ_parcel_centroid_lat'] = parcels_sample.geometry.y
parcels_sample['geometry'] = parcels_sample.geometry_polygon_4326
parcels_sample = parcels_sample.set_crs(4326)
# all are multipolygons
print(set([type(a) for a in parcels_sample.geometry]))
{<class 'shapely.geometry.multipolygon.MultiPolygon'>}
%%time
def extract_verts(geometry):
lat = np.nan
lng = np.nan
if geometry:
coordinates = []
for polygon in geometry:
# the last point is the same as the first
coordinates.extend(polygon.exterior.coords[:-1])
lng = f"[{'; '.join([str(round(point[0], 6)) for point in coordinates])}]"
lat = f"[{'; '.join([str(round(point[1], 6)) for point in coordinates])}]"
return pd.Series([lng, lat])
parcels_sample[["LINZ_parcel_vertices_lon", "LINZ_parcel_vertices_lat"]] = parcels_sample.geometry.apply(extract_verts)
CPU times: user 6.93 s, sys: 49.1 ms, total: 6.98 s Wall time: 6.96 s
roads = parcels[parcels.parcel_intent == "Road"]
roads = roads.to_crs(4326)
%%time
roads_dissolved = roads.dissolve()
CPU times: user 30.6 s, sys: 146 ms, total: 30.7 s Wall time: 30.7 s
parcels_sample2['geometry'] = parcels_sample2.geometry_polygon_4326
parcels_sample2 = parcels_sample2.set_crs(4326)
def get_points_in_roads(geom):
"""return a list of points from the geometry that fall within roads_dissolved"""
# assume multipolygon
assert isinstance(geom, MultiPolygon), f"not implemented for geometry of type {type(geom)}"
road_points = []
# iterate over polygons
for poly in geom:
# split polygon into vertices
# the last point is the same as the first
coords = poly.exterior.coords[:-1]
pointsx = [x for x, _ in coords]
pointsy = [y for _, y in coords]
# create gdf with one row per vertex
points_gdf = gpd.GeoDataFrame(geometry=gpd.points_from_xy(pointsx, pointsy)).set_crs(4326)
# sjoin with roads, to eliminate vertices that don't intersect a road
road_points.extend(gpd.sjoin(points_gdf, roads_dissolved, op = 'intersects').geometry.values)
return road_points
def pointarray2matarrays(pointarray):
"""list of points to matlab compatible arrays of longs and lats
i.e.
[point1, point2,...] -> 'point1_x; point2_x; ...', 'point1_y; point2_y; ...'
"""
lon = [point.xy[0][0] for point in pointarray]
lat = [point.xy[1][0] for point in pointarray]
lon = f"[{'; '.join([str(round(lon, 6)) for lon in list(lon)])}]"
lat = f"[{'; '.join([str(round(lat, 6)) for lat in list(lat)])}]"
return lon, lat
# within_points = [get_points_in_roads(g) for g in parcels_sample2.geometry]
road_intersections = parcels_sample2.geometry.progress_apply(get_points_in_roads)
# split into matlab compatible longs and lats like [(longs_list, lats_list), (longs_list, lats_list),...]
road_intersections = [pointarray2matarrays(road_points) for road_points in road_intersections]
lons = [r[0] for r in road_intersections]
lats = [r[1] for r in road_intersections]
parcels_sample2['LINZ_parcel_roadvertices_lon'] = lons
parcels_sample2['LINZ_parcel_roadvertices_lat'] = lats
# example
sample = parcels_sample.iloc[[2065]]
road_points = get_points_in_roads(sample.iloc[0].geometry)
print(road_points)
ax = gpd.GeoDataFrame(geometry=road_points).plot()
sample.plot(ax=ax, alpha=0.5)
roads_dissolved.plot(ax=ax, color='red', alpha=0.5)
x1, y1, x2, y2 = sample.buffer(0.005).total_bounds
plt.xlim(x1, x2)
plt.ylim(y1, y2)
plt.show()
[<shapely.geometry.point.Point object at 0x7fa45980ed90>, <shapely.geometry.point.Point object at 0x7fa45980ea90>, <shapely.geometry.point.Point object at 0x7fa459a8a3a0>, <shapely.geometry.point.Point object at 0x7fa459a506a0>, <shapely.geometry.point.Point object at 0x7fa459a1bbe0>, <shapely.geometry.point.Point object at 0x7fa459a1b220>, <shapely.geometry.point.Point object at 0x7fa459a1b4f0>, <shapely.geometry.point.Point object at 0x7fa459a1b6a0>, <shapely.geometry.point.Point object at 0x7fa459a9c250>, <shapely.geometry.point.Point object at 0x7fa4189409a0>, <shapely.geometry.point.Point object at 0x7fa418940160>, <shapely.geometry.point.Point object at 0x7fa4599f2a90>]
<ipython-input-197-5fad70017631>:8: UserWarning: Geometry is in a geographic CRS. Results from 'buffer' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. x1, y1, x2, y2 = sample.buffer(0.005).total_bounds
For convenience/speed, 1h and 1i can be gotten simultaneously.
Note: Do 2a and 2b here first, which are needed for 1i.
What to use for joining parcels with AUP zones?
first, 2a and 2b
%%time
aup_zones = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_BaseZone.shp')
aup_zones.sindex
# aup_zones = aup_zones.to_crs(2193)
aup_zones.sample(3)
CPU times: user 25.8 s, sys: 2.4 s, total: 28.2 s Wall time: 30.1 s
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4323 | 57767.0 | 20161111010 | None | 20160718211 | None | {2653E2A1-4629-48CA-AADC-2038EFCAC51E} | 2 | Residential | None | Everglade School | 4324 | None | None | None | None | None | None | None | 25532.898535 | 745.711232 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 18 | Residential - Mixed Housing Suburban Zone | NaN | POLYGON Z ((1768860.282 5903754.262 0.000, 176... |
| 111639 | NaN | 20161111012 | None | 20160718211 | None | {2AFF46AC-8E35-48EC-89C3-D0E18F776D78} | 7 | General | None | None | 111640 | None | None | None | None | None | None | None | 3452.179432 | 398.432755 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 27 | Road | NaN | POLYGON Z ((1757930.155 5916271.915 0.000, 175... |
| 47139 | NaN | 20161111011 | None | 20160718211 | None | {1F572D18-2901-44B3-B7FA-5B9F8F71C99A} | 5 | Coastal | None | None | 47140 | None | None | None | None | None | None | None | 0.835644 | 5.790642 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 59 | Coastal - Coastal Transition Zone | NaN | POLYGON Z ((1750889.741 5924939.607 0.000, 175... |
parcels['geometry'] = parcels.representative_point_4326
parcels = parcels.set_crs(4326)
aup_zones = aup_zones.to_crs(parcels.crs)
%%time
parcels_zoned = gpd.sjoin(parcels, aup_zones[['ZONE', 'ZONE_resol', 'geometry']], op='within').drop(columns=['index_right'])
CPU times: user 9.4 s, sys: 247 ms, total: 9.65 s Wall time: 9.67 s
# check if there was a one to one match
print(len(parcels_zoned.index.unique()))
print(len(parcels_zoned))
print(len(parcels.index.unique()))
print(len(parcels))
537095 537118 537290 537290
# 23 parcels fall have their representative point fall under multiple AUP zone polygons
# but see below, only one of these falls in two AUP zone polygons with different zone codes
np.unique(parcels_zoned.index.value_counts(), return_counts=True)
(array([1, 2]), array([537072, 23]))
%%time
def get_parcel_zones(id):
if id in parcels_zoned.index:
zoned = parcels_zoned.loc[[id]]
# filter out duplicates
return ','.join(zoned.ZONE.unique())
else:
return None
parcel_zones = process_map(get_parcel_zones, list(parcels.index), max_workers=14, chunksize=1000)
CPU times: user 2.32 s, sys: 1.49 s, total: 3.81 s Wall time: 43min 3s
# there is a one to one relationship between zones and zone names
print(len(aup_zones[['ZONE', 'ZONE_resol']].drop_duplicates().ZONE.unique()))
print(len(aup_zones[['ZONE', 'ZONE_resol']].drop_duplicates()))
zone2zone_name = {r.ZONE: r.ZONE_resol for _, r in aup_zones[['ZONE', 'ZONE_resol']].drop_duplicates().iterrows()}
50 50
# only one parcel has its representative point fall in two zones
print([z for z in parcel_zones if z is not None and ',' in z])
print(zone2zone_name['18'])
print(zone2zone_name['19'])
['18,19'] Residential - Mixed Housing Suburban Zone Residential - Single House Zone
# take the first zone where there are multiple (only one case)
parcels['LINZmatch_AUP_code'] = [z.split(',')[0] if (z is not None) else None for z in parcel_zones]
parcels['LINZmatch_AUP_name'] = [zone2zone_name[z.split(',')[0]] if (z is not None) else None for z in parcel_zones]
now do 1h & 1i (now that parcels have zones)
# need polygons, will check for touching neighbours
parcels.geometry = parcels['geometry_polygon_4326']
parcels = parcels.set_crs(4326)
%%time
# number of rows to process at once
n_rows=100
def find_neighbour_info(i):
"""find neighbours of parcels from i * n_rows to (i+1) * n_rows, then find ids and zones of those neighbouring parcels"""
neighbour_gdf = gpd.sjoin(parcels.iloc[i*n_rows:min((i+1)*n_rows, len(parcels)-1)], parcels, op='touches')
neighbour_zones = []
for idx in neighbour_gdf.index:
neighbour_ids.append(neighbour_gdf.loc[[idx]].id_right.tolist())
neighbour_zones.append(neighbour_gdf.loc[[idx]].LINZmatch_AUP_code_right.tolist())
return neighbour_ids, neighbour_zones
# each call to find_neighbours will return two lists like this: [list of neighbour ids], [list of niehgbour zones]
# The final structure will be of shape (n_chunks, 2, n_neighbours), where n_neighbours will vary between lists
parcel_neighbour_chunks = process_map(find_neighbour_info, list(range((int(np.ceil(len(parcels) / n_rows))))), max_workers=14, chunksize=10)
parcel_neighbour_chunks
parcels['LINZ_adjoining_parcel_ID'] = [ids for chunk in parcel_neighbour_chunks for ids in chunk[0]]
parcels['LINZ_parcel_sides_zones'] = [zones for chunk in parcel_neighbour_chunks for zones in chunk[1]]
Note: What name to use? 'descriptio'?
power = gpd.read_file('input/Transmission_Lines_exTRANSPOWER/Transmission_Lines.shp')
power = power.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
# only interested in overhead
power = power[power['type'] == 'TRANSLINE']
power.sample(3)
| OBJECTID | MXLOCATION | designvolt | status | descriptio | type | Symbol | Shape__Len | GlobalID | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 4 | ALB-HPI-A | 220 | COMMISSIONED | Albany - Huapai A | TRANSLINE | 220 TRANSLINE | 14887.805068 | 8cb81a31-2c9c-497c-a8fc-646614b9290d | MULTILINESTRING ((1737714.820 5930696.980, 173... |
| 183 | 194 | HLY-OTA-A | 220 | COMMISSIONED | Huntly - Otahuhu A | TRANSLINE | 220 TRANSLINE | 76954.696844 | 43214498-696d-470f-9a06-1592ec8eaab8 | MULTILINESTRING ((1790043.000 5842870.000, 179... |
| 83 | 85 | HEN-MPE-A | 110 | COMMISSIONED | Henderson - Maungatapere A | TRANSLINE | 110 TRANSLINE | 137361.615225 | 8c723fe3-75ae-487c-9e8f-d6431a1bc9d4 | MULTILINESTRING ((1744228.000 5921585.000, 174... |
ax = power.plot(column='designvolt', legend=True)
plt
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, crs=2193)
parcels_sample['geometry'] = parcels_sample['geometry_polygon_2193']
parcels_sample = parcels_sample.set_crs(2193)
# get dataframe associating parcel indices with overhead power lines
power_intersect = gpd.sjoin(parcels_sample, power[['descriptio', 'geometry']]).drop(columns=['index_right'])
power_intersect.index.value_counts()
377815 10
381419 3
403504 3
466685 2
2097 2
..
333894 1
329285 1
169028 1
371011 1
45696 1
Length: 100, dtype: int64
id = 381419
ax = parcels_sample_under.loc[id].plot()
bounds = parcels_sample_under.loc[id].iloc[[0]].bounds
plt.xlim((bounds.minx.values[0] - 100, bounds.maxx.values[0] + 100))
plt.ylim((bounds.miny.values[0] - 100, bounds.maxy.values[0] + 100))
power.cx[bounds.minx.values[0]:bounds.maxx.values[0], bounds.miny.values[0]:bounds.maxy.values[0]].plot(column='descriptio', ax = ax, legend=True)
<AxesSubplot:>
%%time
def get_powerlines(id):
if id in power_intersect.index:
powerlines = power_intersect.loc[[id]]
# filter out duplicates
return powerlines.descriptio.unique().tolist()
# return ','.join(powerlines.descriptio.unique())
else:
return None
parcel_powerlines = process_map(get_powerlines, list(parcels_sample.index), max_workers=14, chunksize=1000)
parcels_sample['LINZ_TRNSPWR_ohead_name'] = parcel_powerlines
parcels_sample['LINZ_TRNSPWR_ohead_indicator'] = [p is not None for p in parcel_powerlines]
CPU times: user 95.4 ms, sys: 1.45 s, total: 1.55 s Wall time: 1.86 s
!ls restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/*View[Ss]haf*.shp
restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_AucklandMuseumViewshaftContoursOverlay.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_AucklandMuseumViewshaftOverlay.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_DilworthTerraceHousesViewshaftContoursOverlay.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_DilworthTerraceHousesViewshaftOverlay.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_LocallySignificantVolcanicViewshaftsContours.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_LocallySignificantVolcanicViewshafts.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_RegionallySignificantVolcanicViewShaftsAndHeightSensitiveAreasOverlay.shp restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_RegionallySignificantVolcanicViewshaftsContours.shp
!ls restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/*ViewShaf*.shp
restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_RegionallySignificantVolcanicViewShaftsAndHeightSensitiveAreasOverlay.shp
viewshafts_local = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_LocallySignificantVolcanicViewshafts.shp').to_crs(2193)
viewshafts_regional = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_RegionallySignificantVolcanicViewShaftsAndHeightSensitiveAreasOverlay.shp').to_crs(2193)
ax = viewshafts_regional[viewshafts_regional.NAME.isin(viewshafts_local.NAME.unique())].plot(color='blue', alpha=0.6)
viewshafts_local.plot(ax=ax, color='red', alpha=0.6)
blue_patch = mpatches.Patch(color='blue', label='regionally significant viewshafts')
red_patch = mpatches.Patch(color='red', label='locally significant viewshafts')
plt.legend(handles=[red_patch, blue_patch])
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, crs=2193)
ax = viewshafts_local.plot(column='NAME', alpha=0.6, legend=True)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, crs=2193)
ax = viewshafts_regional.plot(column='NAME', alpha=0.6, legend=True)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, crs=2193)
viewshafts_museum = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_AucklandMuseumViewshaftOverlay.shp').to_crs(2193)
viewshafts_dilworth = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_DilworthTerraceHousesViewshaftOverlay.shp').to_crs(2193)
ax = viewshafts_museum.plot(alpha=0.6, legend=True)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, crs=2193)
ax = viewshafts_dilworth.plot(alpha=0.6, legend=True)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, crs=2193)
parcels_sample['geometry'] = parcels_sample.geometry_polygon_2193
parcels_sample = parcels_sample.set_crs(2193)
gpd.sjoin(parcels_sample, viewshafts_regional, how='left')
| id | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | geometry_centroid_4326 | geometry_polygon_4326 | geometry_centroid_2193 | geometry_polygon_2193 | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | index_right | OBJECTID | TYPE | TYPE_resol | SUBTYPE | SCHEDULE | NAME | VALIDATION | VALIDATI00 | last_edi00 | created_da | VERSIONSTA | VERSIONS00 | DocumentUR | GlobalID | SHAPE_Leng | SHAPE_Area | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 332148 | 6611744 | Section 15 SO 317214 | SO 317214 | Road | Primary | [Create] Road, Limited Access Road and State H... | North Auckland | None | 594.0 | 600.0 | MULTIPOLYGON (((1749971.336 5940738.679, 17499... | POINT (174.67774 -36.66742) | MULTIPOLYGON (((174.67817 -36.66715, 174.67822... | POINT (1749932.255 5940710.038) | MULTIPOLYGON (((1749971.336 5940738.679, 17499... | [174.678168; 174.678224; 174.677304; 174.67725... | [-36.667152; -36.667189; -36.667681; -36.66764... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 138662 | 4971220 | Lot 71 DP 4276 | DP 4276 | DCDB | Primary | None | North Auckland | NA184/150 | 612.0 | 613.0 | MULTIPOLYGON (((1755077.168 5915667.766, 17550... | POINT (174.74041 -36.89243) | MULTIPOLYGON (((174.74038 -36.89224, 174.74055... | POINT (1755079.119 5915646.345) | MULTIPOLYGON (((1755077.168 5915667.766, 17550... | [174.74038; 174.740546; 174.740433; 174.740267... | [-36.892238; -36.892272; -36.892623; -36.89258... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 276820 | 6555480 | Lot 2 DP 204937 | DP 204937 | Fee Simple Title | Primary | None | North Auckland | NA133B/442 | 475.0 | 475.0 | MULTIPOLYGON (((1771091.292 5912435.935, 17710... | POINT (174.92060 -36.91862) | MULTIPOLYGON (((174.92074 -36.91859, 174.92076... | POINT (1771078.208 5912432.399) | MULTIPOLYGON (((1771091.292 5912435.935, 17710... | [174.920744; 174.920757; 174.920695; 174.92073... | [-36.918588; -36.91862; -36.918719; -36.918737... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 31004 | 4744873 | Lot 106 DP 57498 | DP 57498 | DCDB | Primary | None | North Auckland | NA11B/1184 | 657.0 | 657.0 | MULTIPOLYGON (((1751708.537 5925603.069, 17517... | POINT (174.70078 -36.80340) | MULTIPOLYGON (((174.70061 -36.80326, 174.70078... | POINT (1751723.367 5925587.744) | MULTIPOLYGON (((1751708.537 5925603.069, 17517... | [174.700606; 174.70078; 174.700945; 174.700846... | [-36.803262; -36.803205; -36.803533; -36.80356... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 494255 | 4696478 | Lot 49 DP 71916 | DP 71916 | DCDB | Primary | None | North Auckland | NA38A/689 | 675.0 | 675.0 | MULTIPOLYGON (((1755689.311 5929951.217, 17557... | POINT (174.74443 -36.76359) | MULTIPOLYGON (((174.74432 -36.76344, 174.74465... | POINT (1755698.853 5929934.142) | MULTIPOLYGON (((1755689.311 5929951.217, 17557... | [174.744322; 174.744652; 174.744543; 174.74421... | [-36.763436; -36.763582; -36.763741; -36.76359... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 24891 | 4732360 | Lot 1 DP 189392 | DP 189392 | DCDB | Primary | None | North Auckland | NA118B/374 | 860.0 | 793.0 | MULTIPOLYGON (((1733103.033 5930453.056, 17331... | POINT (174.49143 -36.76224) | MULTIPOLYGON (((174.49127 -36.76235, 174.49127... | POINT (1733117.978 5930465.529) | MULTIPOLYGON (((1733103.033 5930453.056, 17331... | [174.491265; 174.491267; 174.491269; 174.49126... | [-36.762354; -36.762346; -36.762331; -36.76231... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 25636 | 4733918 | Lot 105 DP 59567 | DP 59567 | DCDB | Primary | None | North Auckland | NA14C/462 | 703.0 | 703.0 | MULTIPOLYGON (((1770070.630 5918154.599, 17700... | POINT (174.90802 -36.86744) | MULTIPOLYGON (((174.90801 -36.86725, 174.90821... | POINT (1770070.878 5918133.805) | MULTIPOLYGON (((1770070.630 5918154.599, 17700... | [174.908008; 174.908208; 174.908026; 174.90782... | [-36.867249; -36.867551; -36.867628; -36.86732... | 5.0 | 6.0 | 3 | Height Sensitive Areas | None | None | None | 3 | Valid and Public | 20161109010 | 20160530235 | 4 | Operative | None | {7EE3FD70-DD2B-4138-B975-6CA1301626A8} | 9595.090952 | 1.303548e+06 |
| 25636 | 4733918 | Lot 105 DP 59567 | DP 59567 | DCDB | Primary | None | North Auckland | NA14C/462 | 703.0 | 703.0 | MULTIPOLYGON (((1770070.630 5918154.599, 17700... | POINT (174.90802 -36.86744) | MULTIPOLYGON (((174.90801 -36.86725, 174.90821... | POINT (1770070.878 5918133.805) | MULTIPOLYGON (((1770070.630 5918154.599, 17700... | [174.908008; 174.908208; 174.908026; 174.90782... | [-36.867249; -36.867551; -36.867628; -36.86732... | 6.0 | 7.0 | 4 | Viewshafts | None | B6 | Browns Island | 3 | Valid and Public | 20161109010 | 20160531002 | 4 | Operative | None | {4921E0E2-AFE9-4BAA-9D17-CC5669FC65C1} | 31274.305310 | 3.767934e+07 |
| 369084 | 7503911 | Part Lot 33 DP 19195 | None | Fee Simple Title | Primary | None | North Auckland | NA46C/131 | 556.0 | 7.0 | MULTIPOLYGON (((1753477.305 5920162.246, 17534... | POINT (174.72155 -36.85201) | MULTIPOLYGON (((174.72152 -36.85200, 174.72156... | POINT (1753479.593 5920160.987) | MULTIPOLYGON (((1753477.305 5920162.246, 17534... | [174.721524; 174.721557; 174.721568; 174.721524] | [-36.852002; -36.851997; -36.85204; -36.852002] | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 334475 | 4769767 | Part Allot N35 PSH OF Ahuroa | SO 27 | DCDB | Primary | None | North Auckland | NA14D/470 | 730609.0 | 120330.0 | MULTIPOLYGON (((1743070.851 5963605.949, 17430... | POINT (174.59335 -36.46327) | MULTIPOLYGON (((174.59673 -36.46215, 174.59668... | POINT (1742765.292 5963486.442) | MULTIPOLYGON (((1743070.851 5963605.949, 17430... | [174.596732; 174.596678; 174.594612; 174.58897... | [-36.462145; -36.464002; -36.464061; -36.46422... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
10527 rows × 34 columns
def extract_viewshaft_info(
parcels_sample.geometry.iloc[0].overlaps()
<bound method BaseGeometry.overlaps of <shapely.geometry.multipolygon.MultiPolygon object at 0x7f8c3ef45fa0>>
I've included all of these as rural:
['Rural - Mixed Rural Zone',
'Rural - Rural Coastal Zone',
'Rural - Countryside Living Zone',
'Rural - Rural Production Zone',
'Rural - Rural Conservation Zone',
'Rural - Waitakere Ranges Zone',
'Rural - Waitakere Foothills Zone']
%%time
aup_zones = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_BaseZone.shp')
aup_zones = aup_zones.to_crs(2193)
aup_zones.sample(3)
CPU times: user 31.2 s, sys: 2.21 s, total: 33.4 s Wall time: 33.3 s
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 58724 | NaN | 20161111011 | None | 20160718211 | None | {0CCC66B0-9A15-477A-BE4F-FAE9C5FA9C15} | 5 | Coastal | None | None | 58725 | None | None | None | None | None | None | None | 62.153950 | 58.346375 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 59 | Coastal - Coastal Transition Zone | NaN | POLYGON Z ((1756089.130 5973206.276 0.000, 175... |
| 103279 | NaN | 20161111012 | None | 20160718211 | None | {111B7716-6401-457F-A260-9BA18D7E0DB9} | 5 | Coastal | None | None | 103280 | None | None | None | None | None | None | None | 372.662968 | 106.669373 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 30 | Coastal - General Coastal Marine Zone | NaN | POLYGON Z ((1810661.036 5993908.378 0.000, 181... |
| 54081 | NaN | 20161111011 | None | 20160718211 | None | {CD16D435-B3A5-4C26-BFC7-E6AF32E60231} | 7 | General | None | None | 54082 | None | None | None | None | None | None | None | 41502.960080 | 1179.183755 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 26 | Strategic Transport Corridor Zone | NaN | POLYGON Z ((1737071.120 5973948.034 0.000, 173... |
rural_codes = aup_zones[aup_zones.ZONE_resol.str.lower().str.contains('rural - ', na=False)]['ZONE'].unique()
# check that each rural zone code matches with a unique rural zone name
assert all([len(aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()) == 1 for code in rural_codes])
# dictionary mapping code to names
rural_code2name = {code: aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()[0] for code in rural_codes}
aup_zones[aup_zones.ZONE_resol.isna()]
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 20853 | NaN | 20161111011 | None | 20160718211 | None | {2D83F680-9587-4D57-AF0B-CB7CA27C3D2C} | 6 | Special purpose zone | None | None | 20854 | None | None | None | None | None | None | None | 22.286945 | 72.629617 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.860 5903714.023 33.721, 17... |
| 121765 | NaN | 20161111010 | None | 20160718211 | None | {FD3E8BB4-1979-42B5-9A77-5D04AE5190AF} | 6 | Special purpose zone | None | None | 121766 | None | None | None | None | None | None | None | 4.114962 | 86.135341 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.083 5903713.690 33.721, 17... |
# 2 NAs in ZONE_resol are from a zone 58, which only has observations
aup_zones[aup_zones.ZONE == '58']
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 20853 | NaN | 20161111011 | None | 20160718211 | None | {2D83F680-9587-4D57-AF0B-CB7CA27C3D2C} | 6 | Special purpose zone | None | None | 20854 | None | None | None | None | None | None | None | 22.286945 | 72.629617 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.860 5903714.023 33.721, 17... |
| 121765 | NaN | 20161111010 | None | 20160718211 | None | {FD3E8BB4-1979-42B5-9A77-5D04AE5190AF} | 6 | Special purpose zone | None | None | 121766 | None | None | None | None | None | None | None | 4.114962 | 86.135341 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.083 5903713.690 33.721, 17... |
rural = aup_zones[aup_zones.ZONE.isin(rural_codes)]
%%time
# method 1, slightly slower: dissolve by zone
rural_by_zone = rural.dissolve(by='ZONE').reset_index(drop=False)
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for index, rural_row in rural_by_zone.iterrows():
distance_candidates.append(row.geometry.distance(rural_row.geometry))
code_candidates.append(rural_row.ZONE)
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 1min 2s, sys: 363 ms, total: 1min 3s Wall time: 1min 2s
%%time
# method 2, slightly faster: dissolve all after subsetting gpd dfs
rural_by_zone_dict = {code: rural[rural.ZONE == code].dissolve() for code in rural_codes}
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for code, rural_gdf in rural_by_zone_dict.items():
distance_candidates.append(row.geometry.distance(rural_gdf.geometry[0]))
code_candidates.append(rural_gdf.ZONE[0])
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 56.2 s, sys: 289 ms, total: 56.5 s Wall time: 56.2 s
parcels_sample['Hdist_rural'] = distances[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_rural_code'] = codes[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_rural_name'] = parcels_sample.apply(lambda x: rural_code2name[x.Hdist_rural_code], axis=1)
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
colours = ('blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'cyan')
name2colour = {name: colour for name, colour in zip(rural_code2name.values(), colours)}
column = 'Hdist_rural'
subsample = parcels_sample[parcels_sample[column] > 0].sample(5)
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
subsample = subsample[~subsample.is_empty]
# ax = rural.plot(column='ZONE_resol', legend=True)
# subsample.plot(column='Hdist_rural_name', alpha=0.4, ax=ax)
ax = rural.plot(color=[name2colour[z] for z in rural.ZONE_resol])
subsample.plot(color=[name2colour[z] for z in subsample.Hdist_rural_name], alpha=0.65, ax=ax)
plt.ylim((5.88e6, 5.96e6))
# add legend
legend_patches = [mpatches.Patch(color=col, label=name) for name, col in name2colour.items()]
plt.legend(handles=legend_patches)
ctx.add_basemap(ax=ax, crs=2193)
import matplotlib
matplotlib.colors
<module 'matplotlib.colors' from '/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/matplotlib/colors.py'>
business_codes = aup_zones[aup_zones.ZONE_resol.str.lower().str.contains('business - ', na=False)]['ZONE'].unique()
# check that each business zone code matches with a unique business zone name
assert all([len(aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()) == 1 for code in business_codes])
# dictionary mapping code to names
business_code2name = {code: aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()[0] for code in business_codes}
business_code2name
{'44': 'Business - Neighbourhood Centre Zone',
'12': 'Business - Mixed Use Zone',
'17': 'Business - Light Industry Zone',
'5': 'Business - Heavy Industry Zone',
'49': 'Business - General Business Zone',
'1': 'Business - Business Park Zone',
'22': 'Business - Town Centre Zone',
'10': 'Business - Metropolitan Centre Zone',
'7': 'Business - Local Centre Zone',
'35': 'Business - City Centre Zone'}
business = aup_zones[aup_zones.ZONE.isin(business_codes)]
%%time
# method 1, slightly slower: dissolve by zone
business_by_zone = business.dissolve(by='ZONE').reset_index(drop=False)
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for index, business_row in business_by_zone.iterrows():
distance_candidates.append(row.geometry.distance(business_row.geometry))
code_candidates.append(business_row.ZONE)
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 1min 2s, sys: 363 ms, total: 1min 3s Wall time: 1min 2s
%%time
# method 2, slightly faster: dissolve all after subsetting gpd dfs
business_by_zone_dict = {code: business[business.ZONE == code].dissolve() for code in business_codes}
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for code, business_gdf in business_by_zone_dict.items():
distance_candidates.append(row.geometry.distance(business_gdf.geometry[0]))
code_candidates.append(business_gdf.ZONE[0])
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 44.9 s, sys: 230 ms, total: 45.1 s Wall time: 44.8 s
parcels_sample['Hdist_bus'] = distances[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_bus_code'] = codes[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_bus_name'] = parcels_sample.apply(lambda x: business_code2name[x.Hdist_bus_code], axis=1)
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
colours = ('blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'cyan', 'black', 'white')
name2colour = {name: colour for name, colour in zip(business_code2name.values(), colours)}
# hard to see, subset to smaller area
bounds = {'x1': 1.76e6, 'x2': 1.7613e6, 'y1': 5.9123e6, 'y2': 5.914e6}
column = 'Hdist_bus'
subsample = parcels_sample.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
subsample = subsample[~subsample.is_empty]
business_plot = business.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
ax = business_plot.plot(color=[name2colour[z] for z in business_plot.ZONE_resol])
subsample.plot(color=[name2colour[z] for z in subsample.Hdist_bus_name], alpha=0.65, ax=ax)
# plt.ylim((5.88e6, 5.96e6))
# add legend
legend_patches = [mpatches.Patch(color=col, label=name) for name, col in name2colour.items()]
plt.legend(handles=legend_patches)
ctx.add_basemap(ax=ax, crs=2193)
/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/geopandas/geodataframe.py:1322: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super(GeoDataFrame, self).__setitem__(key, value)
resid_codes = aup_zones[aup_zones.ZONE_resol.str.lower().str.contains('resid', na=False)]['ZONE'].unique()
resid_codes
array(['60', '23', '18', '8', '19', '20'], dtype=object)
# check that each resid zone code matches with a unique resid zone name
assert all([len(aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()) == 1 for code in resid_codes])
# dictionary mapping code to names
resid_code2name = {code: aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()[0] for code in resid_codes}
resid_code2name
{'60': 'Residential - Mixed Housing Urban Zone',
'23': 'Residential - Large Lot Zone',
'18': 'Residential - Mixed Housing Suburban Zone',
'8': 'Residential - Terrace Housing and Apartment Building Zone',
'19': 'Residential - Single House Zone',
'20': 'Residential - Rural and Coastal Settlement Zone'}
resid = aup_zones[aup_zones.ZONE.isin(resid_codes)]
%%time
# method 1, slightly slower: dissolve by zone
resid_by_zone = resid.dissolve(by='ZONE').reset_index(drop=False)
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for index, resid_row in resid_by_zone.iterrows():
distance_candidates.append(row.geometry.distance(resid_row.geometry))
code_candidates.append(resid_row.ZONE)
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 1min 2s, sys: 363 ms, total: 1min 3s Wall time: 1min 2s
%%time
# method 2, slightly faster: dissolve all after subsetting gpd dfs
resid_by_zone_dict = {code: resid[resid.ZONE == code].dissolve() for code in resid_codes}
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for code, resid_gdf in resid_by_zone_dict.items():
distance_candidates.append(row.geometry.distance(resid_gdf.geometry[0]))
code_candidates.append(resid_gdf.ZONE[0])
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 2min 9s, sys: 509 ms, total: 2min 9s Wall time: 2min 8s
parcels_sample['Hdist_resid'] = distances[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_resid_code'] = codes[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_resid_name'] = parcels_sample.apply(lambda x: resid_code2name[x.Hdist_resid_code], axis=1)
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
colours = ('blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'cyan', 'black', 'white')
name2colour = {name: colour for name, colour in zip(resid_code2name.values(), colours)}
# hard to see, subset to smaller area
bounds = {'x1': 1.76e6, 'x2': 1.7613e6, 'y1': 5.9123e6, 'y2': 5.914e6}
column = 'Hdist_resid'
subsample = parcels_sample.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
subsample = subsample[~subsample.is_empty]
resid_plot = resid.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
ax = resid_plot.plot(color=[name2colour[z] for z in resid_plot.ZONE_resol])
subsample.plot(color=[name2colour[z] for z in subsample.Hdist_resid_name], alpha=0.65, ax=ax)
# plt.ylim((5.88e6, 5.96e6))
# add legend
legend_patches = [mpatches.Patch(color=col, label=name) for name, col in name2colour.items()]
plt.legend(handles=legend_patches)
ctx.add_basemap(ax=ax, crs=2193)
/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/geopandas/geodataframe.py:1322: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super(GeoDataFrame, self).__setitem__(key, value)
Note: this is the real name for i: 'Residential - Terrace Housing and Apartment Building Zone'
%%time
postfix2name = {
'SH': 'Residential - Single House Zone',
'MHS': 'Residential - Mixed Housing Suburban Zone',
'MHU': 'Residential - Mixed Housing Urban Zone',
'THA': 'Residential - Terrace Housing and Apartment Building Zone'
}
for postfix, zone in tqdm(postfix2name.items()):
resid_gdf = resid[resid.ZONE_resol == zone].dissolve()
parcels_sample[f'Hdist_{postfix}'] = parcels_sample.apply(lambda x: x.geometry.distance(resid_gdf.geometry[0]), axis=1)
CPU times: user 1min 42s, sys: 78.1 ms, total: 1min 42s Wall time: 1min 42s
postfix = 'THA'
column = f'Hdist_{postfix}'
subsample = parcels_sample.sample(10)
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
ax = subsample.plot(color='red', alpha=0.4)
resid[resid.ZONE_resol == postfix2name[postfix]].plot(ax=ax)
ctx.add_basemap(ax, crs=2193)
LA = gpd.read_file('input/Modified_Community_Boards_SHP.zip').to_crs(2193)
LA.sample(3)
| OBJECTID | Local_Area | geometry | |
|---|---|---|---|
| 8 | 10.0 | Hibiscus Coast | MULTIPOLYGON (((1752023.352 5954803.281, 17520... |
| 17 | 21.0 | Papatoetoe | POLYGON ((1765105.421 5908611.893, 1765113.661... |
| 2 | 25.0 | Rodney-Kumeu-Riverhead | POLYGON ((1742532.808 5931237.574, 1742490.377... |
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, LA[['Local_Area', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'Local_Area': 'Local_Area_name'})
sa2 = gpd.read_file('NZ-SA/statistical-area-2-2020-generalised.gdb').to_crs(2193)
sa2.sample(3)
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, sa2[['SA22020_V1_00_NAME', 'SA22020_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'SA22020_V1_00_NAME': 'SA22018_name', 'SA22020_V1_00': 'SA22018_code'})
CPU times: user 8.29 s, sys: 3.88 ms, total: 8.29 s Wall time: 8.29 s
au2013 = gpd.read_file('input/area-unit-2013.gdb.zip').to_crs(2193)
au2013.sample(3)
| AU2013_V1_00 | AU2013_V1_00_NAME | AREA_SQ_KM | LAND_AREA_SQ_KM | Shape_Length | geometry | |
|---|---|---|---|---|---|---|
| 1481 | 573903 | Newlands North | 0.804087 | 0.804087 | 5958.640337 | MULTIPOLYGON (((174.82896 -41.21949, 174.83011... |
| 958 | 525202 | Kawakawa-Orere | 105.216001 | 105.216001 | 57437.556145 | MULTIPOLYGON (((175.14259 -36.93214, 175.14265... |
| 608 | 597102 | Inland Water-Lake Ellesmere South | 63.304671 | 0.000000 | 75233.330853 | MULTIPOLYGON (((172.57398 -43.76779, 172.57401... |
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, au2013[['AU2013_V1_00_NAME', 'AU2013_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'AU2013_V1_00_NAME': 'AU2013_name', 'AU2013_V1_00': 'AU2013_code'})
CPU times: user 6.96 s, sys: 0 ns, total: 6.96 s Wall time: 6.96 s
mb2018 = gpd.read_file('input/meshblock-2018-clipped-generalised.gdb.zip').to_crs(4326)
mb2018.sample(3)
| MB2018_V1_00 | LANDWATER | LANDWATER_NAME | LAND_AREA_SQ_KM | AREA_SQ_KM | SHAPE_Length | geometry | |
|---|---|---|---|---|---|---|---|
| 52865 | 4011926 | 12 | Mainland | 0.173710 | 0.173710 | 2129.422679 | MULTIPOLYGON (((174.91945 -36.94519, 174.92053... |
| 23832 | 1429800 | 12 | Mainland | 0.021918 | 0.021918 | 731.394363 | MULTIPOLYGON (((176.91682 -39.49291, 176.91702... |
| 15554 | 0759520 | 12 | Mainland | 0.020695 | 0.020695 | 928.355735 | MULTIPOLYGON (((174.91840 -37.01411, 174.91871... |
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = gpd.sjoin(parcels_sample, mb2018[['MB2018_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'MB2018_V1_00': 'MB2018_code'})
CPU times: user 11.3 s, sys: 8.2 ms, total: 11.3 s Wall time: 11.3 s
mb2013 = gpd.read_file('input/meshblock-2013.gdb.zip').to_crs(4326)
mb2013.sample(3)
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, mb2013[['MeshblockNumber', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'MeshblockNumber': 'MB2013_code'})
CPU times: user 11.4 s, sys: 7.05 ms, total: 11.4 s Wall time: 11.4 s
For these distance calculations, use EPSG 2193 (less distortion).
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = parcels_sample.to_crs(2193)
parcels_sample.sample(3)
| id | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | geometry_centroid | geometry_polygon | SA22018_name | SA22018_code | MB2013_code | MB2018_code | AU2013_name | AU2013_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 102782 | 4896031 | Lot 1 DP 61837 | DP 61837 | DCDB | Primary | None | North Auckland | NA17C/935 | 685.0 | 685.0 | POINT (1773608.374 5914596.096) | POINT (174.94849 -36.89867) | MULTIPOLYGON (((174.94827 -36.89874, 174.94841... | Cockle Bay | 153400 | 0655102 | 0655102 | Cockle Bay | 521502 |
| 119786 | 4931702 | Lot 4 DP 166054 | DP 166054 | DCDB | Primary | None | North Auckland | NA100D/34 | 52130.0 | 50575.0 | POINT (1783027.533 5889659.615) | POINT (175.06020 -37.12153) | MULTIPOLYGON (((175.06295 -37.12105, 175.06293... | Ararimu | 166400 | 0815302 | 0815302 | Hunua | 521132 |
| 362342 | 7415715 | Lot 23 DP 455616 | DP 455616, DP 462513 | Fee Simple Title | Primary | None | North Auckland | 586776 | 103.0 | 103.0 | POINT (1763989.858 5916157.717) | POINT (174.84025 -36.88632) | MULTIPOLYGON (((174.84016 -36.88629, 174.84020... | Stonefields West | 144900 | 0465305 | 4000293 | Stonefields | 517202 |
There are a few different datasets that could be used for this:
- NZ Coastlines (Topo 1:50k) https://data.linz.govt.nz/layer/50258-nz-coastlines-topo-150k/
- NZ Coastline - mean high water https://data.linz.govt.nz/layer/105085-nz-coastline-mean-high-water/
- NZ Coastlines and Islands Polygons (Topo 1:50k) https://data.linz.govt.nz/layer/51153-nz-coastlines-and-islands-polygons-topo-150k/
The first doesn't have islands (e.g. Waiheke).
The second is probably most appropriate.
%%time
coastline = gpd.read_file('input/lds-nz-coastline-mean-high-water-FGDB.zip!nz-coastline-mean-high-water.gdb').to_crs(2193)
coastline = coastline.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
coastline_dissolved = coastline.dissolve()
%%time
parcels_sample['Hdist_coast'] = parcels_sample.apply(lambda x: x.geometry.distance(coastline_dissolved.geometry[0]), axis=1)
CPU times: user 14.3 s, sys: 0 ns, total: 14.3 s Wall time: 14.3 s
# if distance work, then red circles should extend to the nearest coastline, and no further
subsample = parcels_sample.sample(10)
subsample['coast_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_coast), axis=1)
subsample['geometry'] = subsample['coast_buffer']
ax = subsample.plot(color='red', alpha=0.4)
coastline.cx[1.7e6:1.8e6, 5.85e6:5.97e6].plot(ax=ax)
<AxesSubplot:>
roads = gpd.read_file('input/lds-nz-road-centrelines-topo-150k-FGDB.zip!nz-road-centrelines-topo-150k.gdb').to_crs(2193)
roads = roads.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
highways = roads[~roads.hway_num.isna()]
highways_dissolved = highways.dissolve()
ax = highways.plot()
ctx.add_basemap(ax, crs=2193)
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['Hdist_motorway'] = parcels_sample.apply(lambda x: x.geometry.distance(highways_dissolved.geometry[0]), axis=1)
# if distance work, then red circles should extend to the nearest coastline, and no further
subsample = parcels_sample.sample(10)
subsample['highway_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_motorway), axis=1)
subsample['geometry'] = subsample['highway_buffer']
ax = subsample.plot(color='red', alpha=0.4)
highways.plot(ax=ax)
<AxesSubplot:>
railroads = gpd.read_file('input/lds-nz-railway-centrelines-topo-150k-SHP.zip').to_crs(2193)
railroads = railroads.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
railroads_dissolved = railroads.dissolve()
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['Hdist_rail'] = parcels_sample.apply(lambda x: x.geometry.distance(railroads_dissolved.geometry[0]), axis=1)
# if distance work, then red circles should extend to the nearest coastline, and no further
subsample = parcels_sample.sample(10)
subsample['rail_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_rail), axis=1)
subsample['geometry'] = subsample['rail_buffer']
ax = subsample.plot(color='red', alpha=0.4)
railroads_dissolved.plot(ax=ax)
ctx.add_basemap(ax, crs=2193)
skytower = [-36.84838748948485, 174.7621736911587]
skytower = gpd.points_from_xy(x=[skytower[1]], y=[skytower[0]])
skytower = gpd.GeoDataFrame([{"name": "Skytower", "value": 1}], geometry=skytower, crs="EPSG:4326").to_crs(epsg=2193)
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['Hdist_skytower'] = parcels_sample.apply(lambda x: x.geometry.distance(skytower.geometry[0]), axis=1)
# if distance works, then red circles should extend to the nearest sky tower, and no further
subsample = parcels_sample.sample(10)
subsample['skytower_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_skytower), axis=1)
subsample['geometry'] = subsample['skytower_buffer']
ax = subsample.plot(color='red', alpha=0.2)
skytower.plot(ax=ax, color='black')
coastline.cx[1.7e6:1.8e6, 5.85e6:5.97e6].plot(ax=ax)
<AxesSubplot:>
Indicator (1 or 0) for consent located in SpHAs SpHA_indicator
Note: here I've used centroids. Maybe should use parcel polygons instead.
spha = gpd.read_file('input/AC_Special_Housing_Area.zip').to_crs(2193)
spha_dissolved = spha.dissolve()
assert(len(spha_dissolved) == 1)
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['SpHA_indicator'] = parcels_sample.apply(lambda x: spha_dissolved.geometry.contains(x.geometry), axis=1)
# if distance works, then red circles should extend to the nearest sky tower, and no further
subsample = parcels_sample.sample(500)
ax=subsample.plot(column='SpHA_indicator', alpha=0.6)
plt.ylim((5.89e6, 5.95e6))
plt.xlim((1.73e6, 1.78e6))
spha_dissolved.boundary.plot(ax=ax)
ctx.add_basemap(ax, crs=spha_dissolved.crs)